# Replication Archive for: Aggarwal, Minali, Jennifer Allen, Alex Coppock, 
# Dan Frankowski, Sol Messing, Kelly Zhang, James Barnes, Andrew Beasley, 
# Harry Hantman, and Sylvan Zheng: The impact of digital campaign advertising 
# during the 2020 US presidential election: evidence from survey experiments, 
# field experiments, and a campaign-level experiment. 
# Nature Human Behavior, forthcoming.

library(tidyverse)
library(estimatr)
library(car)
# library(lmtest)

# load data and uncount
aggregated_set <- read_rds(file = "aggregated_analysis_set.rds")
main_analysis_set <- uncount(aggregated_set, weights = n)


# We find evidence of differential turnout effects by modeled level of Trump support: the campaign increased voting among Biden leaners by 0.4 percentage points (SE: 0.2pp) and decreased voting among Trump leaners by 0.3 percentage points (SE: 0.3pp), for a difference of 0.7 points that is just distinguishable from zero ($t(1035571) = -2.09, $p = 0.036$, $\hat{difference-in-cates} = 0.7$ points, 95\% CI = $[-0.014, -0.00]$)

difference_in_cates <-
  lm_robust(
    formula = voted_in_2020 ~ condition*tss_bucket + num_times_voted + tss_100 + pts_100,
    weights = ipw,
    data = filter(main_analysis_set, tss_bucket %in% c("30 to 40", "60 to 70"))
  )%>% 
  tidy()



# According to our pre-registered regression specification, we find that the campaign increased voting among Biden leaners (those with modeled Trump support scores between 30 and 40) by 0.4 percentage points (t(522913) = 1.61, p = 0.11, CATE = 0.004, 95% CI: [-0.001, 0.008]) and decreased voting among Trump leaners (Trump support score between 60 and 70) by 0.3 percentage points (t(512655) = 1.61, p = 0.17, CATE = −0.003, 95% CI: [-0.008, 0.001]).

fit_biden <-
  lm_robust(
    formula = voted_in_2020 ~ condition + num_times_voted + tss_100 + pts_100,
    weights = ipw,
    data = filter(main_analysis_set, tss_bucket %in% c("30 to 40"))
  ) %>%
  tidy()

# ($t(522913) = 1.61$, $p = 0.11$, $\hat{CATE} = 0.004$, 95\% CI: [-0.001, 0.008])


fit_trump <-
  lm_robust(
    formula = voted_in_2020 ~ condition + num_times_voted + tss_100 + pts_100,
    weights = ipw,
    data = filter(main_analysis_set, tss_bucket %in% c("60 to 70"))
  ) %>%
  tidy()
# ($t(512655) = 1.61$, $p = 0.17$, $\hat{CATE} = -0.003$, 95\% CI: [-0.008, 0.001])


# Focusing on the preregistered specification (middle column of facets), we find that the overall effect on turnout (ATE) is -0.06 percentage points, with a robust standard error of 0.12 points.

# ($t(1999277) = -0.52$, $p = 0.60$, $\hat{ATE} = -0.0006$, 95\% CI: [-0.0030, 0.0017])


fit_ATE <-
  lm_robust(
    formula = voted_in_2020 ~ condition + num_times_voted + tss_100 + pts_100,
    weights = ipw,
    data = main_analysis_set) %>%
  tidy()



#  Using a very narrow equivalence range (plus or minus one-third of a percentage point), we can affirm our overall estimate is effectively equivalent to zero using the Two One-Sided Tests (TOST) procedure ($p = 0.013$)

eq_bound <- 1/300
fit_ATE |>
  filter(term == "conditiontreat") %>%
  mutate(
    p_lower = pnorm(
      estimate,
      mean = -1 * eq_bound,
      sd = std.error,
      lower.tail = FALSE
    ),
    p_upper = pnorm(
      estimate,
      mean = eq_bound,
      sd = std.error,
      lower.tail = TRUE
    )
  )

# Among registered Republicans, we find that the advertising program caused a 1 percentage point decrease in turnout (SE: 0.5 points), while among Democrats, we find a 0.8 point increase in turnout (SE: 0.5 points). 

fit_Republicans <-
  lm_robust(
    formula = voted_in_2020 ~ condition + num_times_voted + tss_100 + pts_100,
    weights = ipw,
    data = filter(main_analysis_set, party %in% c("Republican"))
  ) %>%
  tidy()

# ($t(71870) = -2.00$, $p = 0.045$, $\hat{CATE} = -0.010$, 95\% CI: [-0.0194, -0.000])


fit_Democrats <-
  lm_robust(
    formula = voted_in_2020 ~ condition + num_times_voted + tss_100 + pts_100,
    weights = ipw,
    data = filter(main_analysis_set, party %in% c("Democrat"))
  ) %>%
  tidy()
# ($t(182940) = 1.58$, $p = 0.11$, $\hat{CATE} = 0.008$, 95\% CI: [-0.002, 0.017])



# Here we see that differential effects of the program are stronger in our early voting data than in the in-person voting data. 


early_inperson <-
  lm(
    formula = cbind(voted_in_person_2020, voted_early_2020) ~ condition*tss_bucket + num_times_voted + tss_100 + pts_100,
    weights = ipw,
    data = filter(main_analysis_set, tss_bucket %in% c("30 to 40", "60 to 70"))
  ) 

out <-
  car::linearHypothesis(
    early_inperson,
    vcov = hccm(early_inperson, type = "hc2"),
    hypothesis.matrix = c(0, 0, 0, 0, 0, 0, 1),
    P = matrix(c(1, -1))
  )

print(out)

# Naive comparison
difference_in_dics <- 0.00310 - -0.0102
difference_in_dics_se_naive = sqrt(0.00167^2 + 0.00173^2)
t_naive = difference_in_dics/difference_in_dics_se_naive

# Benjamini-Hochberg correction 
all_covs <-
  c(
    'party_dem',
    'party_rep',
    'party_unknown',
    'party_other',
    'tss_30_40',
    'tss_40_50',
    'tss_50_60',
    'tss_60_70',
    'tss_100',
    'pts_100',
    "ideology_score_100",
    "partisan_score_100",
    "voted_in_2000",
    "voted_in_2002",
    "voted_in_2004",
    "voted_in_2006",
    "voted_in_2008",
    "voted_in_2010",
    "voted_in_2012",
    "voted_in_2014",
    "voted_in_2016",
    "voted_in_2018"
  )

cov_labels <-
  c(
    'Party: Democrat',
    'Party: Republican',
    'Party: Unknown',
    'Party: Other',
    'TSS: 30-40',
    'TSS: 40-50',
    'TSS: 50-60',
    'TSS: 60-70',
    'Trump support score / 100',
    'Turnout score / 100',
    "Ideology score / 100",
    "Partisanship score / 100",
    "Voted in 2000",
    "Voted in 2002",
    "Voted in 2004",
    "Voted in 2006",
    "Voted in 2008",
    "Voted in 2010",
    "Voted in 2012",
    "Voted in 2014",
    "Voted in 2016",
    "Voted in 2018"
  )

balance_regs <-
  lm_robust(formula = formula(paste0(
    "cbind(", paste0(all_covs, collapse = ", "), ") ~ condition"
  )),
  weights = ipw,
  data = main_analysis_set)

balance_df <-
  tidy(balance_regs) %>%
  filter(term == "conditiontreat") %>% 
  mutate(outcome = factor(outcome, levels = all_covs, labels = cov_labels),
         adjusted_p = p.adjust(p.value, method = "BH"))

balance_df %>%
  filter(p.value < 0.05) %>%
  select(outcome, p.value, adjusted_p)

# > balance_df %>%
#  +   filter(p.value < 0.05) %>%
#  +   select(outcome, p.value, adjusted_p)
# outcome     p.value adjusted_p
# 1                TSS: 60-70 0.009092519  0.2000354
# 2 Trump support score / 100 0.023825894  0.2620848


# F-test 

omni_fit <-
  lm_robust(formula(paste0(
    "treat_num ~", "
    party_dem + party_rep + party_unknown + 
    tss_40_50 + tss_50_60 + tss_60_70 +
    tss_100 + pts_100 +
    voted_in_2000 + voted_in_2002 + voted_in_2004 + voted_in_2006 + 
    voted_in_2008 + voted_in_2010 + voted_in_2012 + voted_in_2014 + 
    voted_in_2016 + voted_in_2018 + strata"
  )),
  data = main_analysis_set)

omni_fit_restricted <-
  lm_robust(formula(paste0(
    "treat_num ~", "strata"
  )),
  data = main_analysis_set)

waldtest(omni_fit, omni_fit_restricted, test = "F")

#Model 1: treat_num ~ party_dem + party_rep + party_unknown + tss_40_50 + 
#  tss_50_60 + tss_60_70 + tss_100 + pts_100 + voted_in_2000 + 
#  voted_in_2002 + voted_in_2004 + voted_in_2006 + voted_in_2008 + 
#  voted_in_2010 + voted_in_2012 + voted_in_2014 + voted_in_2016 + 
#  voted_in_2018 + strata
#Model 2: treat_num ~ strata
#Res.Df  Df      F Pr(>F)
#1 1999246                  
#2 1999264 -18 1.0266 0.4245


# Spending calculations

df_acronym <- read_csv("ad_library_2020_acronym.csv", col_types = c(page_id = "c"))

## Acronym spend

df_acronym %>%
  filter(!is_turnout) %>%
  summarize(spend_lb = sum(spend_lb), spend_ub = sum(spend_ub))

df_acronym %>%
  filter(!is_turnout) %>%
  group_by(ad_format) %>%
  summarize(spend_lb = sum(spend_lb), spend_ub = sum(spend_ub))

df_acronym %>%
  filter(!is_turnout) %>%
  group_by(ad_type) %>%
  summarize(spend_lb = sum(spend_lb), spend_ub = sum(spend_ub))

## Total spend
df_acronym %>%
  summarize(spend_lb = sum(spend_lb), spend_ub = sum(spend_ub))

